#delimit;
clear all;
set more off;

capture log using "13_Create_NAICS_Appx_Table.log", replace;
/*******************************************************************************************/;
** INPUT: DAF Classification data cleaned; 
** OUTPUT: NAICS Table for Appendix; 
** LAST MODIFIED: JAG, 1/30/2022;
** RUN TIME: <1 minute using Stata 15 on computer with 2 x 2.60GHz processors and 64GB RAM;  													      	
/*******************************************************************************************/;

** This is the NAICS file with names to match the number;
import excel "../Raw Data/Handcode DAF Classification/NAICS Descriptors.xlsx", sheet("NAICS") firstrow case(lower) clear;
bys naics: assert _n==1;
replace ncode_daf=0 if ncode_daf==.;
replace disc_or_disp=0 if disc_or_disp==.;
replace ncode_foundation=0 if ncode_foundation==.;
sum *;
bys _all: assert _n==1;
save "naics classification.dta", replace;

** This is the cleaned version of the RA coded data;
use "DAF classification clean.dta", clear;
gen count=1;
gen total=_N;
collapse (mean) total (sum) count, by(naics);
merge 1:1 naics using "naics classification.dta"; 
drop if _m==2;
gsort -count;
log close;
